---
title: "Replication Materials"
subtitle: "Pipeline Diversity via Career Diversity: Lessons from a Research Experience for Undergraduates (REU) Program"
author: "Jennifer Barnes, Emily Hencken Ritter, Sharece Thrower, Alexander Tripp, Elizabeth J. Zechmeister"
output: html_document
---

```{r set up}
rm(list = ls())

library(dplyr)
```

# Reading in REU and Non-REU undergraduate survey data

```{r data}
reu_pre <- read.csv("reu_pre_cleaned.csv")
reu_post <- read.csv("reu_post_cleaned.csv")
undergrad_survey <- read.csv("undergrad_cleaned.csv")
```


# Demographic questions 

```{r}
#Federal Work Study Participation 
mean(reu_pre$q1) # 0.2

#Pell Grants
mean(reu_pre$q2) # 0.44

#First-generation college student
1-mean(reu_pre$q3) # 0.48

#Mentorship Experience
mean(reu_pre$q8) # 0.4

#Number of Mentors 
mean(reu_pre$q8a, na.rm=T) # 1.7

#Year in college
table(reu_pre$student_status)
# 4 = Rising 4th year college undergraduate


#gender, ethnicity, public/private
```


# Comparing pre-program REU students to Non-REU undergraduate students 

## Barriers to graduate education

```{r}
#Work study
t.test(reu_pre$q1, undergrad_survey$q1, alternative = "two.sided")

#Pell grants
t.test(reu_pre$q2, undergrad_survey$q2, alternative = "two.sided")

#Parents' college
t.test(reu_pre$q3, undergrad_survey$q3, alternative = "two.sided")

#Negative work resps
t.test(reu_pre$q4, undergrad_survey$q4, alternative = "two.sided")

#Negative family resps
t.test(reu_pre$q5, undergrad_survey$q5, alternative = "two.sided")

#Mentors
t.test(reu_pre$q8, undergrad_survey$q8, alternative = "two.sided")

#Num mentors
t.test(reu_pre$q8a, undergrad_survey$q8a, alternative = "two.sided")

#Confident costs
t.test(reu_pre$q7_1, undergrad_survey$q7_1, alternative = "two.sided")

#Worry tuition
t.test(reu_pre$q7_2, undergrad_survey$q7_2, alternative = "two.sided")

#Worry cost of living
t.test(reu_pre$q7_3, undergrad_survey$q7_3, alternative = "two.sided")

#Confident financial support
t.test(reu_pre$q7_4, undergrad_survey$q7_4, alternative = "two.sided")

#Conversations grad school
t.test(reu_pre$q8_1, undergrad_survey$q8_1, alternative = "two.sided")

#Don't know backgrounds
t.test(reu_pre$q8_2, undergrad_survey$q8_2, alternative = "two.sided")

#Background valued
t.test(reu_pre$q10_1, undergrad_survey$q10_1, alternative = "two.sided")

#Background discrim
t.test(reu_pre$q10_2, undergrad_survey$q10_2, alternative = "two.sided")

#Not prepared
t.test(reu_pre$q12_1, undergrad_survey$q12_1, alternative = "two.sided")

#Technical skills
t.test(reu_pre$q12_2, undergrad_survey$q12_2, alternative = "two.sided")

#No study skills
t.test(reu_pre$q12_3, undergrad_survey$q12_3, alternative = "two.sided")

#Sufficient info for applications to grad programs
t.test(reu_pre$q13_1, undergrad_survey$q13_1, alternative = "two.sided")

#Don't know what a grad program in social sciences would be like
t.test(reu_pre$q13_2, undergrad_survey$q13_2, alternative = "two.sided")
```

## Outcomes

```{r}
#Compare firstchoice phd for non-REU and REU students...REU students 52% and non-REU 3%
t.test(reu_pre$firstchoicephd, undergrad_survey$firstchoicephd, alternative = "two.sided")

# first choice ma or phd...REU students 72% and non-REU 27%
t.test(reu_pre$firstchoicemaphd, undergrad_survey$firstchoicemaphd, alternative = "two.sided")

# social science PhD...REU students 8.5 and non-REU 5
t.test(reu_pre$q14_1, undergrad_survey$q14_1, alternative = "two.sided")

# research career...REU students 8 and non-REU 5.8
t.test(reu_pre$q15_1, undergrad_survey$q15_1, alternative = "two.sided")

# any advanced degree...REU students 9.6 and non-REU 8.6
t.test(reu_pre$q16_1, undergrad_survey$q16_1, alternative = "two.sided")

# career paths available...REU students 5.7 and non-REU 6
t.test(reu_pre$q13_3, undergrad_survey$q13_3, alternative = "two.sided")
```

# Compare post-program REU students with Non-REU undergraduate students

## Barriers to graduate education

```{r}
#Mentors
t.test(reu_post$q8, undergrad_survey$q8, alternative = "two.sided")

#num mentors
t.test(reu_post$q8a, undergrad_survey$q8a, alternative = "two.sided")

#Confident costs
t.test(reu_post$q7_1, undergrad_survey$q7_1, alternative = "two.sided")

#Worry tuition
t.test(reu_post$q7_2, undergrad_survey$q7_2, alternative = "two.sided")

#Worry cost of living
t.test(reu_post$q7_3, undergrad_survey$q7_3, alternative = "two.sided")

#Confident financial support
t.test(reu_post$q7_4, undergrad_survey$q7_4, alternative = "two.sided")

#Conversations grad school
t.test(reu_post$q8_1, undergrad_survey$q8_1, alternative = "two.sided")

#Don't know backgrounds
t.test(reu_post$q8_2, undergrad_survey$q8_2, alternative = "two.sided")

#Background valued
t.test(reu_post$q10_1, undergrad_survey$q10_1, alternative = "two.sided")

#background discrim
t.test(reu_post$q10_2, undergrad_survey$q10_2, alternative = "two.sided")

#Not prepared
t.test(reu_post$q12_1, undergrad_survey$q12_1, alternative = "two.sided")

#Technical skills
t.test(reu_post$q12_2, undergrad_survey$q12_2, alternative = "two.sided")

#No study skills
t.test(reu_post$q12_3, undergrad_survey$q12_3, alternative = "two.sided")

#Sufficient info for applications to grad programs
t.test(reu_post$q13_1, undergrad_survey$q13_1, alternative = "two.sided")

#Don't know what a grad program in social sciences would be like
t.test(reu_post$q13_2, undergrad_survey$q13_2, alternative = "two.sided")

```

## Outcomes

```{r}
#Compare firstchoice phd for non-REU and REU students...REU students 74% and non-REU 3%
t.test(reu_post$firstchoicephd, undergrad_survey$firstchoicephd, alternative = "two.sided")

# first choice ma or phd...REU students 78% and non-REU 27%
t.test(reu_post$firstchoicemaphd, undergrad_survey$firstchoicemaphd, alternative = "two.sided")

# social science PhD...REU students 8.8 and non-REU 5
t.test(reu_post$q14_1, undergrad_survey$q14_1, alternative = "two.sided")

# research career...REU students 8.1 and non-REU 5.8
t.test(reu_post$q15_1, undergrad_survey$q15_1, alternative = "two.sided")

# any advanced degree...REU students 9.7 and non-REU 8.6
t.test(reu_post$q16_1, undergrad_survey$q16_1, alternative = "two.sided")

# career paths available...REU students 1.9 and non-REU 6
t.test(reu_post$q13_3, undergrad_survey$q13_3, alternative = "two.sided")
```

# Comparing pre- and post-program REU students

## Barriers to graduate education

```{r}
#Mentors
t.test(reu_pre$q8, reu_post$q8, alternative = "two.sided")

#num mentors
t.test(reu_pre$q8a, reu_post$q8a, alternative = "two.sided")

#Confident costs
t.test(reu_pre$q7_1, reu_post$q7_1, alternative = "two.sided")

#Worry tuition
t.test(reu_pre$q7_2, reu_post$q7_2, alternative = "two.sided")

#Worry cost of living
t.test(reu_pre$q7_3, reu_post$q7_3, alternative = "two.sided")

#Confident financial support
t.test(reu_pre$q7_4, reu_post$q7_4, alternative = "two.sided")

#Conversations grad school
t.test(reu_pre$q8_1, reu_post$q8_1, alternative = "two.sided")

#Don't know backgrounds
t.test(reu_pre$q8_2, reu_post$q8_2, alternative = "two.sided")

#Background valued
t.test(reu_pre$q10_1, reu_post$q10_1, alternative = "two.sided")

#background discrim
t.test(reu_pre$q10_2, reu_post$q10_2, alternative = "two.sided")

#Not prepared
t.test(reu_pre$q12_1, reu_post$q12_1, alternative = "two.sided")

#Technical skills
t.test(reu_pre$q12_2, reu_post$q12_2, alternative = "two.sided")

#No study skills
t.test(reu_pre$q12_3, reu_post$q12_3, alternative = "two.sided")

```

## Outcomes

```{r}
#First choice PhD
t.test(reu_pre$firstchoicephd, reu_post$firstchoicephd, alternative = "two.sided")

#First choice PhD or MA
t.test(reu_pre$firstchoicemaphd, reu_post$firstchoicemaphd, alternative = "two.sided")

#Social Science PhD
t.test(reu_pre$q14_1, reu_post$q14_1, alternative = "two.sided")

#Research careers
t.test(reu_pre$q15_1, reu_post$q15_1, alternative = "two.sided")

#Any advanced degree
t.test(reu_pre$q16_1, reu_post$q16_1, alternative = "two.sided")

#Career paths
t.test(reu_pre$q13_3, reu_post$q13_3, alternative = "two.sided")
```

# Program component ratings of post-program REU students

```{r}
#Filter to post2023 and 2024
post2023 <- reu_post %>% filter(year == 2023)
post2024 <- reu_post %>% filter(year == 2024)

# career panels rating

mean(post2023$peq2_3, na.rm=T) # 2023: 8.36
mean(post2024$peq2_1, na.rm=T) # 2024: 8.40

mean(c(post2023$peq2_3, post2024$peq2_1), na.rm=T) # 8.38

# inside the box workshop rating

mean(post2023$peq2_4, na.rm=T) # 2023: 6.90
mean(post2024$peq2_2, na.rm=T) # 2024: 7.60

mean(c(post2023$peq2_4, post2024$peq2_2), na.rm=T) # 7.25

# classes rating

mean(post2023$peq2_5, na.rm=T) # 2023: 8.27
mean(post2024$peq2_3, na.rm=T) # 2024: 8.20

mean(c(post2023$peq2_5, post2024$peq2_3), na.rm=T) # 8.24

# primary mentor relationship rating

mean(post2023$peq2_6, na.rm=T) # 2023: 9.09
mean(post2024$peq2_4, na.rm=T) # 2024: 9.00

mean(c(post2023$peq2_6, post2024$peq2_4), na.rm=T) # 9.05

# other mentor relationships rating

mean(post2023$peq2_7, na.rm=T) # 2023: 8.55
mean(post2024$peq2_5, na.rm=T) # 2024: 9.10

mean(c(post2023$peq2_7, post2024$peq2_5), na.rm=T) # 8.81

# research project rating

mean(post2023$peq2_8, na.rm=T) # 2023: 8.91
mean(post2024$peq2_6, na.rm=T) # 2024: 9.30

mean(c(post2023$peq2_8, post2024$peq2_6), na.rm=T) # 9.10

# research assistantship rating

mean(post2023$peq2_9, na.rm=T) # 2023: 8.27
mean(post2024$peq2_7, na.rm=T) # 2024: 7.30

mean(c(post2023$peq2_9, post2024$peq2_7), na.rm=T) # 7.81

# poster session rating 

mean(post2023$peq2_10, na.rm=T) # 2023: 9.09
mean(post2024$peq2_8, na.rm=T) # 2024: 8.75

mean(c(post2023$peq2_10, post2024$peq2_8), na.rm=T) # 8.95

# student life panel rating

mean(post2023$peq2_11, na.rm=T) # 2023: 7.27
mean(post2024$peq2_9, na.rm=T) # 2024: 8.33

mean(c(post2023$peq2_11, post2024$peq2_9), na.rm=T) # 7.75

# office hours rating

mean(post2023$peq2_12, na.rm=T) # 2023: 8.18
mean(post2024$peq2_10, na.rm=T) # 2024: 8.00

mean(c(post2023$peq2_12, post2024$peq2_10), na.rm=T) # 8.10

```


```{r}

```

